﻿// This data ingestion pipeline works with the output from the DataGenerator project.
//
// The reason for separating data generation from ingestion is:
//
//  - For realism. In real-world scenarios, you will be taking data from external sources,
//    such as existing business data or documents provided by product manufacturers, and need
//    to ingest that data into your system. You must only ingest the data you will really have
//    (e.g., final PDFs) and not artificially-available data such as the intermediate JSON
//    files generated by the DataGenerator.
//
//  - For evaluation of different strategies. There are many possible ways to represent the
//    ingested data (for example, how to chunk and embed text), and we want to be able to
//    try out different ingestion strategies and then numerally evaluate the quality of the
//    end-to-end system.
//
// In this case the data ingestor does *not* write directly to DBs, but instead emits .json
// files that the Backend project can quickly import. This is because data ingestion can be
// a lengthy process, requiring dependencies, and we want each person trying out this sample
// to be able to run the app without going through the ingestion process first.
// In real-world cases that's likely useful too, since you can't normally write directly to
// production DBs.

var generatedDataPath = args.Length > 0 ? args[0] : null;
if (string.IsNullOrEmpty(generatedDataPath))
{
    Console.WriteLine("Usage: DataIngestor [path_to_generated_data]");
    return;
}

if (!Directory.Exists(generatedDataPath))
{
    Console.WriteLine($"Directory not found: {generatedDataPath}. You may need to run the data generator first.");
    return;
}

var solutionDir = PathUtils.FindAncestorDirectoryContaining("*.sln");
var outputDir = Path.Combine(solutionDir, "seeddata", "dev");
Directory.CreateDirectory(outputDir);

await new TicketIngestor().RunAsync(generatedDataPath, outputDir);
await new ProductCategoryIngestor().RunAsync(generatedDataPath, outputDir);
await new ProductIngestor().RunAsync(generatedDataPath, outputDir);
await new EvalQuestionIngestor().RunAsync(generatedDataPath, outputDir);
await new ManualIngestor().RunAsync(generatedDataPath, outputDir);
await new ManualZipIngestor().RunAsync(generatedDataPath, outputDir);
